project descrition:In this file, we are using Piet Johnson data on amphibian macroparasites to determine whether we can detect parasite induced host mortality from observed parasites distribution. Particular, we are focuing on parasites RION

Question 1: Can we detect pihm from field sample using likelihood ratio test?

load in and format data

Loading in the macroparasites field data and formatting it for the LRT function

data <- read.csv("~/Dropbox/pihm_project/data/archival/necropsy20092014_all2.csv", header=TRUE)
para_data_dt <- as.data.table(data)
# Step 1:finding the total number of host and parasites for each site-year-RION combo, save it to an vector
sum_RION <- para_data_dt[,list(sum_para=sum(RION), 
                              tot_host=length(speciescode)),
                         by=c("year","sitename", "speciesname")]

Test for PIHM in all year by site by speciesname distribution

We just look at Pseudacris regilla; subset the full data which only include Pseudacris

sum_regilla <- para_data_dt[which(speciesname=="Pseudacris regilla")]
# distribute the resulting data table according to different year, site and speciesname 
# make a table for the sum of parasites and host for each year, site and species
sum_regilla_rion <- sum_regilla[,list(sum_para=sum(RION), 
                                       tot_host=length(speciescode)),
                                 by=c("year","sitename", "speciesname")]

Loop through each year, site and spcies combo to see if pihm is happening for a given parasite distribution

# saving p-value from LRT analysis, NA is not converging
NA_or_p <- array(NA, dim=c(390, 1))

for (i in 1:nrow(sum_regilla_rion)){

  year_temp <- sum_regilla_rion$year[i]
  site_temp <- sum_regilla_rion$sitename[i]
  df_data <- sum_regilla[which(year==year_temp & sitename==site_temp)]

  if(sum_regilla_rion$sum_para[i]==0){
    NA_or_p[i,] <- c(NA)
  }else{
  # if not, do analysis
  result <- tryCatch({

    mu=mean(df_data$RION)
    variance = var(df_data$RION)
    result <- LRT(a=2, b=-0.15, k=mu^2/(variance-mu), data=df_data$RION, mu=mu)

    }, error = function(err){
    # change initial condition/ more iteration
    # fitdistribution function
    NA_or_p[i,] <-c(NA)
  })

  NA_or_p[i,] <- c(result[10])
  } # End if
} # End for loop

Now let’s explore the results from the previous chrunk, looking at the results in NA_or_p.

# section 2: extract data with parasites and hosts which still have NA as results
tot_NA <- sum(is.na(NA_or_p))

P_smaller_0.05<- NA_or_p <= 0.05 
tot_P_smaller <- sum(P_smaller_0.05, na.rm = TRUE)

P_greater_0.05 <- NA_or_p > 0.05
tot_P_larger   <- sum(P_greater_0.05, na.rm = TRUE)

print(list("Total Na"=tot_NA, 
           "Total probability less than or equal to 0.05"=tot_P_smaller, 
           "Total probability larger than 0.05"=tot_P_larger))
## $`Total Na`
## [1] 268
## 
## $`Total probability less than or equal to 0.05`
## [1] 2
## 
## $`Total probability larger than 0.05`
## [1] 120

Understanding the reasons for NAs in results (268 NAs in the dataset)

May due to no parasites in the dataset or there are numbers of parasites but the results are not converging. Explore the reasons that leads to the none-convergent; whether it due to statistical errors or the biological factors Step 1: extract and figure out which site is NA do not have zero parasites

# make a new column for p value or NA
sum_regilla_rion$p_value = NA_or_p

try_convergence <- sum_regilla_rion[which(sum_para!=0 & is.na(p_value)==TRUE)]
tot_distris = dim(try_convergence)[1]
print(dim(try_convergence))
## [1] 102   6

Step 2: fit a negative bionomial distrbution and find the p-value to all 102

mu_and_k_pvalue <- array(NA, dim=c(102, 3))

for (i in 1:nrow(try_convergence)) {
  year_temp <- try_convergence$year[i]
  site_temp <- try_convergence$sitename[i]
  data_temp <- sum_regilla[which(year==year_temp & sitename==site_temp)]
  hist(data_temp$RION, freq=FALSE, ylab = "Density/Probability", xlab = "number of parasites",   main = paste(year_temp, site_temp))

  fit_distris <- tryCatch({

   mu = mean(data_temp$RION)
   variance = var(data_temp$RION)
   fit_distris <- fitdistr(data_temp$RION, "negative binomial", list(mu=mu,        size=mu^2/(variance-mu)))
   fit_distris$estimate
   
    }, error = function(err){
   return(c(NA,NA,NA))
  })
  
  mu_and_k_pvalue[i,] <- c(fit_distris[1], fit_distris[2], NA)
  
  lines(dnbinom(0:max(data_temp$RION), size=fit_distris[2], mu=fit_distris[1]),col="red")
  # running chi-square test to calculate p-value
  
  table_RION <- table(data_temp$RION)
 if (all(is.na(mu_and_k_pvalue[i,]))==FALSE &
     length(levels(as.factor(table_RION))) > 1){
    #para_vals = 0:100000
    prob <- dnbinom((as.numeric(names(table_RION))), size=fit_distris[2], mu=fit_distris[1])* length(data_temp$RION)
    # table the prob along with the table_rion
    #bin_obs <- cut(para_vals, breaks = c(-1, as.numeric(names(table_RION)), 100000))
  
    p.value <- chisq.test(table_RION, prob)[3]
  mu_and_k_pvalue[i,] <- c(fit_distris[1], fit_distris[2], p.value$p.value)
  } else {
      mu_and_k_pvalue[i,] <- c(fit_distris[1], fit_distris[2], NA)
       
      }
} 
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect

mu_k_pvalue_sitename_year <- do.call(rbind, Map(data.frame,                       year=try_convergence$year , sitename=try_convergence$sitename,           mu=mu_and_k_pvalue[,1], k=mu_and_k_pvalue[,2],                             pvalue=mu_and_k_pvalue[,3]))

** compile didnt dit and did fit ** make a single table with the sitename, year, P, H, P-value from LRT, mu, k, p-value from chi-square test

# compiling results from the question 1

vals <- do.call(rbind, Map(data.frame, year=sum_regilla_rion$year, sitename=sum_regilla_rion$sitename, sum_para=sum_regilla_rion$sum_para,
tot_host=sum_regilla_rion$tot_host, NA_or_pvalue=NA_or_p))

#setkey function
# sitename_year <- cbind(mu_k_pvalue_sitename_year[,1], as.data.frame(mu_k_pvalue_sitename_year[,2]))

 # key.temp <- setkey(as.table(mu_k_pvalue_sitename_year), sitename_year)

compiled_data <- merge(vals, mu_k_pvalue_sitename_year, by.x=c("year","sitename"), by.y=c("year","sitename"), all.x = TRUE)

**Using distribution from data; applying LRT_known_a_and_b

# LRT_known_a_and_b(mu=7.5 , k=8.5754925, data = sum_regilla[which(sitename=='Boob' & year==2009)]$RION, a= 1.67, b= -0.05)
# AIC difference = 0.006

# LRT_known_a_and_b(mu=2.6 , k=0.76, data = sum_regilla[which(sitename=='GDPND014' & year==2009)]$RION, a= 1.67, b= -0.05)
# AIC difference = 0.02 

# LRT_known_a_and_b(mu=18.06 , k=5.71, data = sum_regilla[which(sitename=='Murky Bullfrog' & year==2009)]$RION, a= 1.67, b= -0.05)
# AIC difference = 0.14

# mu = mean(sum_regilla[which(sitename =='TGIF' & year == 2009)]$RION)
# variance = var(sum_regilla[which(sitename =='TGIF' & year == 2009)]$RION)
# k = mu^2/(variance-mu)
# LRT_known_a_and_b(mu = mu , k = k, data = sum_regilla[which(sitename=='TGIF' & year==2009)]$RION, a=  1.67, b= -0.05)
# AIC difference = 1.2, no conclusion but the p_value was 0.002214401 (which suggests that pihm is happening but AIC difference does not support the conclusion)

# mu = mean(sum_regilla[which(sitename =='Frog' & year == 2013)]$RION)
# variance = var(sum_regilla[which(sitename =='Frog' & year == 2013)]$RION)
# k = mu^2/(variance-mu)
# LRT_known_a_and_b(mu = mu , k = k, data = sum_regilla[which(sitename=='Frog' & year==2013)]$RION, a=  1.67, b= -0.05)
# iteraction limite maxit reached, p_value was 0.045, mu=37.6, k=1.77

# LRT_known_a_and_b(mu = 11.153846 , k = 3.7681007, data = sum_regilla[which(sitename=='TGIF' & year==2014)]$RION, a=  1.67, b= -0.05)
 # AIC difference = 0.4, no conclusion made

Question 2: Can we detect pihm from field sample when \(a\) and \(b\) are known?

** Looping through each year and site and run AIC test to get difference and integrate the results into one table with known a= 1.67 and b= -0.05

table_AIC <- array(NA, dim=c(390, 1))

for (i in 1:nrow(compiled_data)) {
  
  year_i <- compiled_data$year[i]
  site_i <- compiled_data$sitename[i]
  data_i <- sum_regilla[which(year==year_i & sitename==site_i)]
  if(is.na(compiled_data[i,5])==TRUE & is.na(compiled_data[i,6])==TRUE){
    table_AIC[i,] <- c(NA)

  } else if(is.na(compiled_data$NA_or_pvalue[i])==FALSE){
    
  mu_temp = mean(data_i$RION)
  variance = var(data_i$RION)
  k_temp = mu_temp^2/(variance-mu_temp)
  dt_AIC <- LRT_known_a_and_b(mu = mu_temp,k = k_temp, a = 1.67, b = -0.05, data = data_i$RION)
  table_AIC[i,] <- c(dt_AIC[1,1]-dt_AIC[2,1])

    } else{
  mu_temp = compiled_data$mu[i]
  k_temp = compiled_data$k[i]
  dt_AIC <- LRT_known_a_and_b(mu = mu_temp,k = k_temp, a = 1.67, b = -0.05, data = data_i$RION)
  table_AIC[i,] <- c(dt_AIC[1,1]-dt_AIC[2,1])
  }
}


compiled_withAIC <- do.call(rbind, Map(data.frame, year=compiled_data$year, sitename=compiled_data$sitename, sum_para=compiled_data$sum_para,
tot_host=compiled_data$tot_host, NA_or_pvalue=compiled_data$NA_or_pvalue, mu = compiled_data$mu, k = compiled_data$k, pvalue= compiled_data$pvalue, dt_AIC=table_AIC))

Question 3: Can we detect pihm from field sample when \(k\) is known?

LRT_k <- array(NA, dim=c(390, 1))

for (i in 1:nrow(compiled_data)){

  year_temp <- compiled_data$year[i]
  site_temp <- compiled_data$sitename[i]
  df_data <- sum_regilla[which(year==year_temp & sitename==site_temp)]

  if(compiled_data$sum_para[i]==0){
    LRT_k[i,] <- c(NA)
  }
  else{
  val_temp <- tryCatch({

    mu=mean(df_data$RION)
    val_temp <- LRT_known_k(a = 2, b = -0.15, k=1, data = df_data$RION, mu = mu)

    }, error = function(err){

   LRT_k[i,] <- c(NA)
  })

  LRT_k[i,] <- c(val_temp[10])
  }
}

compiled_withAIC_with.k <- do.call(rbind, Map(data.frame, year=compiled_data$year,     sitename=compiled_data$sitename, sum_para=compiled_data$sum_para,
tot_host=compiled_data$tot_host, NA_or_pvalue=compiled_data$NA_or_pvalue, mu = compiled_data$mu, k = compiled_data$k, pvalue= compiled_data$pvalue, dt_AIC=table_AIC, pvalue_with.k=LRT_k))

Question 4: Can we detect pihm from field sample when a finite negative binomial distribution is used?